import os
import sys
import json
import jsonlines


def load_data(file_path):
    if file_path.endswith(".jsonl") or file_path.endswith(".jsonlines"):
        with jsonlines.open(file_path, mode='r') as reader:
            return list(reader)
    else:
        with open(file_path, "r", encoding="utf-8") as f:
            return json.load(f)


def update_file(file_path):
    data = load_data(file_path)
    original_len = len(data)

    filtered = [item for item in data if 'allava' not in item.get('image', '')]
    filtered_len = len(filtered)

    out_path = os.path.splitext(file_path)[0] + "_fix.json"
    with open(out_path, "w", encoding="utf-8") as f:
        json.dump(filtered, f, ensure_ascii=False, indent=2)

    print(f"[done] {file_path}")
    print(f"Original: {original_len} | Filtered: {filtered_len} | Removed: {original_len - filtered_len}")
    print(f"Saved to: {out_path}")


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python script.py your_file.json/.jsonl")
        sys.exit(1)

    update_file(sys.argv[1])
